# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/25 10:37
# @File: qu_2_countbykey.py

from pyspark import SparkConf, SparkContext


if __name__ == '__main__':
    conf = SparkConf().setAppName("wc").setMaster("local[*]")

    sc = SparkContext(conf=conf)

    rdd1 = sc.textFile("hdfs://hadoop1:9820/a.txt")

    print(rdd1.collect())

    rdd2 = rdd1.flatMap(lambda x: x.split(" "))

    print(rdd2.collect())

    rdd3 = rdd2.map(lambda x: (x, 1))
    print(rdd3.collect())

    # rdd4 = rdd3.reduceByKey(lambda a, b: a + b)
    # print(rdd4.collect())

    rdd4 = rdd3.countByKey()
    print(rdd4)
